
clear 
set more off

**********************
* Append the data 
**********************

** Set directory
cd "/Users/chuangchen/Library/CloudStorage/OneDrive-UniversityofPittsburgh/LASA/Datos" 

use "Argentina_2003_2015.dta", clear

append using "Colombia_2000_2020.dta", force // variable partido was str8, now str10 to accommodate using data's values
append using "Honduras_2002_2014.dta", force
append using "Chile.dta", force


append using "Brazil_2000_2020.dta", force // variable nestu was byte, now float to accommodate using data's values
append using "CostaRica_2002_2018.dta", force // variable pais was str9, now str10 to accommodate using data's values)
append using "Ecuador_2002_2021.dta", force // variable partido was str10, now str11 to accommodate using data's values
append using "Uruguay_2000_2020.dta", force
append using "Peru_2001_2021.dta", force // variable partido was str11, now str18 to accommodate using data's values
append using "Bolivia_2002_2020.dta", force

append using "DR_2002_2021.dta", force // variable pais was str10, now str18 to accommodate using data's values
append using "Nicaragua_2002_2022.dta", force
append using "ElSalvador_2000_2011.dta", force
append using "Guatemala_2000_2012.dta", force
append using "Mexico.dta", force // variable eco_natural was byte, now float to accommodate using data's values
append using "Panama.dta", force // variable partido was str18, now str22 to accommodate using data's values
append using "Paraguay.dta", force

order pais

replace wave=2 if wave==1.5

gen pais_n=.

replace pais_n=1 if pais=="Argentina"
replace pais_n=2 if pais=="Bolivia"
replace pais_n=3 if pais=="Brazil"
replace pais_n=4 if pais=="Chile"
replace pais_n=5 if pais=="Colombia"
replace pais_n=6 if pais=="Costa Rica"
replace pais_n=7 if pais=="Dominican Republic"
replace pais_n=8 if pais=="Ecuador"
replace pais_n=9 if pais=="El Salvador"
replace pais_n=10 if pais=="Guatemala"
replace pais_n=11 if pais=="Honduras"
replace pais_n=12 if pais=="Mexico"
replace pais_n=13 if pais=="Nicaragua"
replace pais_n=14 if pais=="Panama"
replace pais_n=15 if pais=="Paraguay"
replace pais_n=16 if pais=="Peru"
replace pais_n=17 if pais=="Uruguay"


drop  p1* p2* p3* p4* p5* p6* p7* p8* p9*



global country = "1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 " 
global variable =" val_abortion val_drugs religious rel_catholic rel_evangelical rel_other education female age eco_regulated eco_education_u  eco_employment eco_companies eco_wellbeing eco_health eco_pensions eco_prices eco_education_p eco_education_s eco_housing eco_unemployment eco_environment eco_necessity eco_tax eco_private val_religious val_divorce eco_education_ps eco_tax2 eco_natural eco_inequality  "
global variable2 ="val_ssm val_abortion val_drugs religious rel_catholic rel_evangelical rel_other education female  eco_regulated eco_education_u  eco_employment eco_companies eco_wellbeing eco_health eco_pensions eco_prices eco_education_p eco_education_s eco_housing eco_unemployment eco_environment eco_necessity eco_tax eco_private val_religious val_divorce eco_education_ps eco_tax2 eco_natural eco_inequality  "

global variable4 ="church_attend"

replace eco_inequality=eco_inequaltiy if eco_inequality==.
drop eco_inequaltiy

replace eco_regulated=round(reg_econ*2) if  pais=="Brazil" & legis==2007

**********************
* Cleaning missing values
**********************
{
foreach x in $variable2 {
	sum  `x' 
	replace `x'=. if `x'>11
	
}

global variable3 =" eco_employment eco_pensions eco_prices eco_education_p eco_education_s eco_inequality eco_education_u eco_housing eco_unemployment eco_environment eco_health eco_necessity eco_wellbeing eco_companies eco_services eco_private eco_natural eco_tax"
foreach x in $variable3 {
	sum  `x' 
	replace `x'=. if `x'>8
	
}
sort  pais  wave partido
egen partyN = count(1), by(partido pais wave)
//drop if partyN<3 
}
replace church_attend=. if church_attend>6 & pais~="Brazil" // I am doing this just to remind that Brazil needs to be fixed before. 

**********************
* Creating measure of dispersion. 
**********************
global variable2 ="val_ssm val_abortion val_drugs religious rel_catholic rel_evangelical rel_other education female  eco_regulated eco_education_u  eco_employment eco_companies eco_wellbeing eco_health eco_pensions eco_prices eco_education_p eco_education_s eco_housing eco_unemployment eco_environment eco_necessity eco_tax eco_private val_religious val_divorce eco_education_ps eco_tax2 eco_natural eco_inequality  ID1 ID2"

foreach x in $variable2 {
	sum  `x' 
	egen av_`x'=sum(`x'), by(pais wave partido) // This calculates total by party/wave/pais
	egen av_all_`x'=mean(`x'), by(pais wave partido)
	gen `x'_minusthis =av_`x'- `x' // This substact the legislator we are looking at
	gen p_`x'=`x'_minusthis/(partyN-1) // This calculates the average of the party without the legislator
	gen sq_`x' = . 
	replace sq_`x' =  (`x'-p_`x')^2 if `x'  ~=. // This is the distance

}

/// Alternative approach. First I will standarize. Then I will create the measure distance


foreach x in $variable2 {
	
	sum  `x' 
	by pais wave partido: gen `x'_std=(`x'-r(min))/(r(max)-r(min))
	
}

foreach x in $variable2 {
	egen av_`x'_std=sum(`x'_std), by(pais wave partido) // This calculates total by party/wave/pais
	egen av_all_`x'_std=mean(`x'_std), by(pais wave partido)
	gen `x'_std_minusthis =av_`x'_std- `x'_std // This substact the legislator we are looking at
	gen p_`x'_std=`x'_std_minusthis/(partyN-1) // This calculates the average of the party without the legislator
	gen sq_`x'_std = . 
	replace sq_`x'_std =  (`x'_std-p_`x'_std)^2 if `x'_std  ~=. // This is the distance

}


* eco_employment and eco_pensions 

**********************
* Creating dummy for extreme ideology
**********************

gen far_right=1 if ID1>7 & ID1!=.
replace far_right=0 if ID1<8  & ID1!=.
gen far_left=1 if ID1<4  & ID1!=.
replace far_left=0 if ID1>3  & ID1!=.

gen far_right2=1 if ID2>7  & ID2!=.
replace far_right2=0 if ID2<8  & ID2!=.
gen far_left2=1 if ID2<4  & ID2!=.
replace far_left2=0 if ID2>3  & ID2!=.

**********************
* Creating dummy for values and econ
**********************

gen pro_abortion = 1 if val_abortion>5
replace pro_abortion=0 if val_abortion<6 
replace pro_abortion = . if val_abortion==.

gen pro_regulation = 1 if eco_regulated<6
replace pro_regulation=0 if eco_regulated>5
replace pro_regulation=. if eco_regulated==.




**********************
* Save the data
**********************

merge m:1 partido using "party foundation year.dta"

drop _merge
 
gen age_party=legis-year_founding
 
save "all_countries.dta", replace
